import json
import os


def filter_cn_areas_with_progress(input_file, output_file):
    """
    带进度显示的过滤脚本
    """
    # 获取文件总行数（估算）
    total_lines = sum(1 for line in open(input_file, 'r', encoding='utf-8'))
    print(f"总共约 {total_lines} 条记录")

    count = 0
    processed = 0

    with open(input_file, 'r', encoding='utf-8') as infile, \
            open(output_file, 'w', encoding='utf-8') as outfile:

        for line in infile:
            processed += 1

            try:
                data = json.loads(line.strip())

                if data.get('country') == 'CN':
                    outfile.write(line)
                    count += 1

                # 每处理1000条显示一次进度
                if processed % 1000 == 0:
                    print(f"已处理 {processed}/{total_lines} 条记录，找到 {count} 条CN记录")

            except json.JSONDecodeError:
                continue

    print(f"过滤完成！")
    print(f"总处理记录数: {processed}")
    print(f"保留CN记录数: {count}")
    print(f"输出文件大小: {os.path.getsize(output_file) / (1024 * 1024):.2f} MB")

# 使用示例
filter_cn_areas_with_progress('data/DjiAreas4.json', 'data/DjiAreas4_CN.json')
